Load in CSV data
age_interval <- 1
naming_data <- read_csv("data/naming_colors_participants.csv") %>%
left_join(read_csv("data/naming_colors_data.csv"), by = 'subj')
grouping_data <- read_csv("data/grouping_colors_participants.csv") %>%
left_join(read_csv("data/grouping_colors_data.csv"), by = 'subj')
shipibo_child_data <- read_csv("data/shipibo_children_colors_participants.csv") %>%
left_join(read_csv("data/shipibo_children_colors_data.csv"), by = 'subj')
spanish_child_data <- read_csv("data/spanish_children_colors_participants.csv") %>%
left_join(read_csv("data/spanish_children_colors_data.csv"), by = 'subj')
color_chip_data <- read_csv("data/wcs_measures.csv", skip = 1)
string_spelling_list <- "`Ami` = c('ami'), `Ambi` = c('ambi'), `Barin Poi` = c('barin pui', 'barrin pui', 'barrinpui', 'pui', 'barin poi', 'barrin poi', 'bavrinpui*', 'barri'), `Bexnan` = c('berrnan', 'bexna', 'bexnan'), `Kari` = c('cari', 'carri', 'kari', 'karri'), `Chexe` = c('chese', 'chexe'), `Chimapo` = c('chimapu'), `Emo` = c('emu'), `Jimi` = c('jimi'), `Jisa` = c('jisa'), `Joshin` = c('joshin', 'joxin', 'toshin'), `Joxo` = c('josho', 'joxo'), `Kasho` = c('kashos'), `Keskiti` = c('kex keti'), `Koin` = c('kuin'), `Kononbi` = c('kunumbi'), `Konron` = c('korrum', 'kumrrum', 'kunrrum'), `Koro` = c('coro'), `Mai` = c('mai'), `Mandi` = c('mandi'), `Manxan` = c('manrran', 'manshan', 'manxam', 'manxan', 'maxan', 'maxna'), `Maxe` = c('maxe'), `Nai` = c('nai', 'nia'), `Oxne` = c('oshne'), `Pei` = c('pei'), `Poa` = c('pua'), `Pene` = c('pene'), `Panshin` = c('panshin'), `Pasna` = c('paxsna', 'pasna'), `Paxna` = c('parrna', 'paxna'), `Ranchesh` = c('ranchex'), `Spanish Term` = c('rojo', 'blanco', 'verde', 'amarillo', 'celeste', 'negro', 'morado', 'azul', 'marron', 'bioleta', 'verdesito', 'carne', 'naranjada', 'naranjado', 'amarilla', 'agua', 'agur', 'uva color*', 'violeta', 'pasto payota', 'naranja', 'chocolate', 'rosado', 'rosada', 'narranxa', 'anaranjado', 'coral', 'cerde', 'gris', 'oscuro', 'lila', 'azu', 'color cielo', 'cielo'), `Tena` = c('tena'), `Wiso` = c('wiso'), `Xena` = c('xena'), `Xo` = c('xo'), `Xexe` = c('xexe', 'xexi'), `Yame` = c('rayame', 'yame'), `Yankon` = c('rayanko', 'yankom', 'yankon', 'yankum', 'yankun', 'yankontani', 'yakon', 'yakun', 'yankoncha'), `NA` = c(NA)"
spelling_list <- eval(parse(text = paste0("c(",string_spelling_list,")")))
naming_data %<>%
mutate(color_cat = ifelse(is.na(color_cat), first_response, color_cat)) %>%
mutate(color_cat = ifelse(color_cat %in% unlist(spelling_list), color_cat, NA)) %>%
mutate(color_cat = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(color_cat, x)")))
)
grouping_data %<>%
mutate(`nombre del grupo` = ifelse(`nombre del grupo` %in% unlist(spelling_list),
`nombre del grupo`, NA)) %>%
mutate(`nombre del grupo` = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(`nombre del grupo`, x)")))
)
color_chip_data %<>%
mutate(hex = colorspace::hex(
colorspace::LAB(color_chip_data$`L*`, color_chip_data$`a*`,
color_chip_data$`b*`, color_chip_data$`#cnum`), fixup = T))Which terms appear to be basic and commonly used?
naming_data_profusion <- naming_data %>%
group_by(subj, color_cat) %>%
summarise(n = n()) %>%
group_by(color_cat) %>%
spread(subj, n, fill = 0) %>%
gather(key = 'subj', value = 'n', -color_cat) %>%
summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(),
`Mean % of Chips in Set Labeled` = 100*mean(n)/165) %>%
dplyr::rename(`Color Term` = color_cat)
naming_list <- as.character(na.omit(filter(naming_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))
datatable(naming_data_profusion, rownames = FALSE)In the naming task with 165 color chips, commonly used terms include:
num_groups <- grouping_data %>%
filter(task == 1) %>%
group_by(subj) %>%
summarise(`# of Groups` = n_distinct(`nombre del grupo`)) %>%
ungroup() %>%
summarise(`Avg # of Groups` = mean(`# of Groups`),
`Min # of Groups` = min(`# of Groups`),
`Max # of Groups` = max(`# of Groups`))
grouping_data_profusion <- grouping_data %>%
filter(task == 1) %>%
group_by(subj, `nombre del grupo`) %>%
summarise(`cuantas tarjetas` = mean(`cuantas tarjetas`)) %>%
group_by(`nombre del grupo`) %>%
spread(subj, `cuantas tarjetas`, fill = 0) %>%
gather(key = 'subj', value = 'n', -`nombre del grupo`) %>%
summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(),
`Mean % of Chips in Set Labeled` = 100*mean(n)/60) %>%
dplyr::rename(`Color Term` = `nombre del grupo`)
grouping_list <- as.character(na.omit(filter(grouping_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))
datatable(grouping_data_profusion, rownames = FALSE)In the grouping task with 60 chips, subjects usually create between 4-7 groups and mostly use terms like:
For each color chip, how many adults label it with the same term?
consensus <- 75
naming_consensus <- naming_data %>%
select(subj, chip_id, color_cat) %>%
mutate(set = ifelse((chip_id %% 2) == 0, 'even', 'odd')) %>%
split(.$set) %>%
map_df(function(x) {
x %>%
group_by(chip_id, color_cat) %>%
summarise(n = n()) %>%
group_by(chip_id) %>%
mutate(perc = 100*n/sum(n)) %>%
select(-n)
}) %>%
arrange(chip_id) %>%
rename(`Chip ID` = chip_id, `Color Term` = color_cat, `% of Subjects` = perc)
datatable(naming_consensus %>%
spread(`Color Term`, `% of Subjects`, fill = 0),
rownames = FALSE, fillContainer = TRUE)focal_terms <- pander::p(as.character(
unique(filter(naming_consensus,`% of Subjects` >= consensus)$`Color Term`)),
wrap = '', copula = ', and ')
color_chip_hexes <- color_chip_data[, c('#cnum', 'hex')]
highest_chips <- (naming_consensus %>% group_by(`Color Term`) %>%
filter(`% of Subjects` >= consensus & `% of Subjects` == max(`% of Subjects`)))$`Chip ID`
agreed_chips <- naming_consensus %>%
group_by(`Color Term`) %>%
filter(`% of Subjects` >= consensus) %>%
arrange(`Color Term`, `Chip ID`) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
dplyr::rename(`Hex Code` = hex) %>%
mutate(highest_chips = ifelse(`Chip ID` %in% highest_chips, 1, 0))
datatable(agreed_chips, rownames = FALSE,
options=list(columnDefs = list(list(
visible=FALSE, targets=c(grep('highest_chips', names(agreed_chips))-1))))) %>%
formatStyle('highest_chips', target = 'row',
fontWeight = styleEqual(c(0,1), c('normal','bold'))) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(agreed_chips$`Hex Code`, agreed_chips$`Hex Code`))The only categories with chips that reach a high level of consensus appear to be Yankon, Joshin, Panshin, Joxo, and Wiso
Is there a similar amount of consensus on labeling between children and adults (in Shipibo)?
chip_set <- as.numeric(grep(pattern = "^[0-9]+$", unique(shipibo_child_data$response_1), value = T))
shipibo_1st_response <- shipibo_child_data %>%
mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
filter(task == 1) %>%
mutate(response_1 = ifelse(response_1 %in% unlist(spelling_list),
response_1, NA)) %>%
mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list,
"forcats::fct_collapse(response_1, x)")))
) %>%
mutate( age_ints = round(age/age_interval)*age_interval) %>%
select(subj, age, age_ints, prompt, response_1) %>%
split(.$age_ints) %>%
map_df(function(x) {
x %>%
mutate(response_1 = as.character(response_1)) %>%
spread(prompt, response_1, fill = 'No Response') %>%
gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
group_by(age_ints, prompt, response) %>%
summarise(n = n()) %>%
group_by(age_ints, prompt) %>%
mutate(perc = 100*n/sum(n), n_total = sum(n))
}) %>% ungroup() %>%
mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
`1` = c('celeste'),
`234` = c('verde'),
`245` = c('rojo'),
`274` = c('blanco'),
`297` = c('amarillo'),
`312` = c('negro'),
`320` = c('mierda sol'),
`325` = c('morado'))))) %>%
left_join(color_chip_hexes,
by = c("prompt" = "#cnum")) %>%
rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response,
`% of Subjects` = perc, `Hex Code` = hex) %>%
filter(n_total >= 4)
graph_colors <- c(
'Ambi' = '#874A8C',
'Ami' = '#76296E',
'Barin Poi' = '#6D6212',
'Bexnan' = '#B6D744',
'Chexe' = '#81C147',
'Chimapo' = '#003459',
'Emo' = '#007177',
'Jimi' = '#822158',
'Joshin' = '#BC1E47',
'Joxo' = '#F3F3F3',
'Kari' = '#571848',
'Kasho' = '#F07000',
'Keskiti' = '#E56F92',
'Koin' = '#50491D',
'Kononbi' = '#503B87',
'Konron' = '#BB8F00',
'Koro' = '#7B7B7B',
'Mai' = '#7F5A21',
'Mandi' = '#005637',
'Manxan' = '#FEBBA1',
'Maxe' = '#DC4800',
'Nai' = '#19A2C2',
'Oxne' = '#66BCC9',
'Panshin' = '#EDC800',
'Pasna' = '#D3C5DF',
'Paxna' = '#EC99A2',
'Pei' = '#69C360',
'Pene' = '#55471E',
'Poa' = '#7E4E94',
'Ranchesh' = '#4A2347',
'Tena' = '#C5D500',
'Yame' = '#666412',
'Yankon' = '#00A79E',
'Wiso' = '#272727',
'Xena' = '#D4799C',
'Xexe' = '#9769AE',
'Xo' = '#3A6E14',
'Spanish Term' = '#FF6E00'
)
adult_naming <- naming_consensus %>%
group_by(`Color Term`) %>%
mutate(Age = 18) %>%
arrange(`Chip ID`, `Color Term`) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
filter(`Chip ID` %in% chip_set & !is.na(`Color Term`)) %>%
dplyr::rename(`Hex Code` = hex)
naming_data_combined <- bind_rows(shipibo_1st_response, adult_naming)
term_prototypes <- naming_consensus %>%
group_by(`Color Term`) %>%
dplyr::arrange(`Color Term`, desc(`% of Subjects`)) %>%
slice(1:3) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
dplyr::rename(`Hex Code` = hex)
chip_set_data <- color_chip_data %>%
filter(`#cnum` %in% chip_set) %>%
select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
arrange(`#cnum`) %>%
rename(`Chip ID` = `#cnum`, `Hex Code` = hex)
datatable(chip_set_data, rownames = FALSE) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(chip_set_data$`Hex Code`, chip_set_data$`Hex Code`))p <- ggplot(filter(naming_data_combined, Age < 18),
aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
facet_wrap(~`Chip ID`) +
geom_line(size = 1) +
geom_point( size=3) +
geom_point(data = filter(naming_data_combined, Age >= 18), size=3) +
scale_x_continuous(breaks = c(seq(6,12,2),18), labels = c(seq(6,12,2),'Adult')) +
scale_colour_manual(name = "Color Term",values = graph_colors) +
theme(panel.grid = element_blank())
ggplotly(p)